library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(scales)
library(leaflet)
library(DT)

ggplot2 is great! It can get you to publication-quality figures without much code. You shouldn’t have to be doing much in Illustrator.

Read in and tidy data

This script reads in the file we need; the base tryCatch function will try the first line, but then if it generates an error, it will do something else. In this case, it first tries to read it locally, and if that fails, it will tell you that you need to get it from the KNB, and then it will read it in from the URL that you gave it.

data_url <- "https://knb.ecoinformatics.org/knb/d1/mn/v2/object/urn%3Auuid%3Af119a05b-bbe7-4aea-93c6-85434dcb1c5e"

esc <- tryCatch(
    read.csv("data/escapement.csv", stringsAsFactors = FALSE),
    error=function(cond) {
        message(paste("Escapement file does not seem to exist, so get it from the KNB."))
        esc <- read.csv(url(data_url, method = "libcurl"), stringsAsFactors = FALSE)
        return(esc)
    }
)
## Warning in file(file, "rt"): cannot open file 'data/escapement.csv': No such
## file or directory
## Escapement file does not seem to exist, so get it from the KNB.
head(esc)
##        Location SASAP.Region sampleDate Species DailyCount  Method Latitude
## 1 Akalura Creek       Kodiak 1930-05-24 Sockeye          4 Unknown  57.1641
## 2 Akalura Creek       Kodiak 1930-05-25 Sockeye         10 Unknown  57.1641
## 3 Akalura Creek       Kodiak 1930-05-26 Sockeye          0 Unknown  57.1641
## 4 Akalura Creek       Kodiak 1930-05-27 Sockeye          0 Unknown  57.1641
## 5 Akalura Creek       Kodiak 1930-05-28 Sockeye          0 Unknown  57.1641
## 6 Akalura Creek       Kodiak 1930-05-29 Sockeye          0 Unknown  57.1641
##   Longitude Source
## 1 -154.2287   ADFG
## 2 -154.2287   ADFG
## 3 -154.2287   ADFG
## 4 -154.2287   ADFG
## 5 -154.2287   ADFG
## 6 -154.2287   ADFG

Calculate annual escapement from daily escapement.

annual_esc <- esc %>% 
  mutate(Year = lubridate::year(sampleDate)) %>% # in this case, using lubridate (better than separate in alternative line below)
  #separate(col = sampleDate, into = c("Year", "Month", "Day"), sep = "-", remove = TRUE) %>% 
  group_by(SASAP.Region, Species, Year) %>% 
  summarise(escapement = sum(DailyCount)) %>% 
  filter(Species %in% c("Chinook", "Sockeye", "Pink", "Chum", "Coho")) # more efficient to use %in% than a bunch of or statements with |

head(annual_esc)
## # A tibble: 6 x 4
## # Groups:   SASAP.Region, Species [1]
##   SASAP.Region                          Species  Year escapement
##   <chr>                                 <chr>   <dbl>      <int>
## 1 Alaska Peninsula and Aleutian Islands Chinook  1974       1092
## 2 Alaska Peninsula and Aleutian Islands Chinook  1975       1917
## 3 Alaska Peninsula and Aleutian Islands Chinook  1976       3045
## 4 Alaska Peninsula and Aleutian Islands Chinook  1977       4844
## 5 Alaska Peninsula and Aleutian Islands Chinook  1978       3901
## 6 Alaska Peninsula and Aleutian Islands Chinook  1979      10463

Make some static plots

Every ggplot starts with argument ggplot(). Arguments are data and mapping (how you specify what columns in data frame you want to map to which axes. You almost never see data = and mapping = defined in ggplot2 code, since these are known to be the first two arguments. You start with ggplot function and then add things to it. First, you’ll want to add the plot geometry (the plot to draw).

Here we have a column plot.

ggplot(data = annual_esc, mapping = aes(x = Species, y = escapement)) + 
  geom_col()

To change the color of the bars, can use fill argument…

ggplot(data = annual_esc, mapping = aes(x = Species, y = escapement, fill = "blue")) + 
  geom_col()

But it made the bars red! When we specified fill in aes function, it created a dummy column with word “blue” for every single row, then mapped that variable to the fill axis using default color palette.

If you want to change the color/size/etc in relation to a given variable in your dataframe, you do want to put it in the aes argument.

ggplot(data = annual_esc, mapping = aes(x = Species, y = escapement, fill = SASAP.Region)) + 
  geom_col()

You can also specify the aes() mapping in the geom_ function as well, and get the same result…

ggplot(data = annual_esc) + 
  geom_col(mapping = aes(x = Species, y = escapement, fill = SASAP.Region))

OR move both the data and mapping to geom_call.

ggplot() + 
  geom_col(data = annual_esc, mapping = aes(x = Species, y = escapement, fill = SASAP.Region))

This can be useful when you are bringing in multiple data layers (ex. spatial data) in same plot. HOWEVER, it’s generally a good idea to put data and mapping arguments in the ggplot function so that you don’t overwrite things with the geom calls.

But in this case, if we just want all bars blue, we can move it down into the geom_col function.

ggplot(data = annual_esc, mapping = aes(x = Species, y = escapement)) + 
  geom_col(fill = "blue")

We can also filter the data before putting it into ggplot. This doesn’t actually store the filtered data in its own dataframe. This kind of syntax is great if you want to run the same thing for every region… can then just use a loop!

annual_esc %>% 
  filter(SASAP.Region == "Kodiak") %>% 
  ggplot(mapping = aes(x = Year, y = escapement, color = Species)) + 
    geom_line() +
    geom_point()

Or the base R version with subset, instead of filter…

ggplot(subset(annual_esc, SASAP.Region == "Kodiak"), 
       aes(x = Year, y = escapement, color = Species)) + 
  geom_line() +
  geom_point()

But for ease, let’s save the Kodiak region in its own dataframe.

kodiak_esc <- annual_esc %>% 
  filter(SASAP.Region == "Kodiak")

ggplot(data = kodiak_esc, mapping = aes(x = Year, y = escapement, color = Species)) +
  geom_line() +
  geom_point()

You can change axes labels with labs function, OR if you only want to change some of the labels, can use ylab, xlab, etc.

ggplot(data = kodiak_esc, mapping = aes(x = Year, y = escapement, color = Species)) +
  geom_line() +
  geom_point() +
  ylab("Escapement (num fish)") +
  ggtitle("Kodiak Salmon Escapement")

The theme_ functions has a number of default themes. The theme function has a lot of helpful arguments to change many aspects of the figure’s appearance. As a note, if you are using ggplot themes, and then making modifications, you need to call them in that order. Otherwise, you will overwrite all of your theme modifications with the ggplot theme.

ggplot(data = kodiak_esc, mapping = aes(x = Year, y = escapement, color = Species)) +
  geom_line() +
  geom_point() +
  ylab("Escapement (num fish)") +
  ggtitle("Kodiak Salmon Escapement") + 
  theme_bw() +
  theme(legend.position = "bottom",
        legend.title = element_blank())

If you like a theme, you can save theme calls to their own object and add to end of ggplot call. This is very handy if you are making multiple plots with the same theme and don’t want to repeat yourself.

my_theme <- theme_bw() +
  theme(legend.position = "bottom",
        legend.title = element_blank())

ggplot(data = kodiak_esc, mapping = aes(x = Year, y = escapement, color = Species)) +
  geom_line() +
  geom_point() +
  ylab("Escapement (num fish)") +
  ggtitle("Kodiak Salmon Escapement") + 
  my_theme

If we want to fix the y-axis so it isn’t in scientific notation anymore… The scales package is great for reformatting. Can also use to fix percentages, etc.

ggplot(data = kodiak_esc, mapping = aes(x = Year, y = escapement, color = Species)) +
  geom_line() +
  geom_point() +
  scale_y_continuous(labels = comma) +
  ylab("Escapement (num fish)") +
  ggtitle("Kodiak Salmon Escapement") + 
  my_theme

cowplot is a great package for creating panels of multiple ggplot figures - can arrange legends, axes, etc.

Or we can use facet_wrap for a certain subset of cases… Can quickly stitch multiple plots together. If you want different axes for each one, you can free them with the scales = argument.

ggplot(data = annual_esc, mapping = aes(x = Year, y = escapement, color = Species)) +
  geom_line() +
  geom_point() +
  scale_y_continuous(labels = comma) + 
  facet_wrap(~SASAP.Region, scales = "free_y") +
  my_theme

Make some maps

locations <- esc %>% 
  distinct(Location, Latitude, Longitude) %>% 
  drop_na()

# generate a pretty and interactive data table
datatable(locations)
head(locations)
##         Location Latitude Longitude
## 1  Akalura Creek  57.1641 -154.2287
## 4    Anvik River  62.7000 -160.2000
## 6 Ayakulik River  57.1956 -154.5290
## 7     Bear Creek  56.0389 -160.2734
## 8     Bear River  56.0389 -160.2734
## 9  Big Bay Creek  58.5426 -152.5683

Leaflet generates interactive maps. It works similarly to ggplot - but unlike ggplot, it takes pipes instead of plus signs.

leaflet(locations) %>% 
  addTiles() %>% 
  addMarkers(lng = ~Longitude, 
             lat = ~Latitude, 
             popup = ~Location) # you need the ~ to indicate that it's modeled on that argument, or mapping to it

To make this a little bit prettier…

leaflet(locations) %>% 
  addWMSTiles("https://www.gebco.net/data_and_products/gebco_web_services/web_map_service/mapserv?",
              layers = 'GEBCO_LATEST',
              attribution = "Imagery reproduced from the GEBCO_2014 Grid, version 20150318, www.gebco.net") %>%
  addCircleMarkers(lng = ~Longitude,
                   lat = ~Latitude,
                   popup = ~ Location,
                   radius = 5,
                   # set fill properties
                   fillColor = "salmon",
                   fillOpacity = 1,
                   # set stroke properties
                   stroke = T,
                   weight = 0.5,
                   color = "white",
                   opacity = 1)

It’s also possible to have images, etc. in the pop-ups. The popups take html code so if you have an image hosted somewhere you can add it in there. popup = "<img src = 'https://urlhere.png' width='20%'/>" And if you want a different image for each of the points, can specify the URLs somewhere in the data frame.